# Copyright 2026 Danslav Slavenskoj, Lingenic LLC
# License: CC0 1.0 - Public Domain
# https://creativecommons.org/publicdomain/zero/1.0/
# You may use this code for any purpose without attribution.
#
# Spec: https://hsvfile.com
# Repo: https://github.com/LingenicLLC/HSV

package HSV;

use strict;
use warnings;
use Exporter 'import';

our @EXPORT_OK = qw(parse SOH STX ETX EOT SO SI DLE FS GS RS US);

# Control characters
use constant SOH => "\x01";  # Start of Header
use constant STX => "\x02";  # Start of Text (data block)
use constant ETX => "\x03";  # End of Text
use constant EOT => "\x04";  # End of Transmission
use constant SO  => "\x0e";  # Shift Out (start nested)
use constant SI  => "\x0f";  # Shift In (end nested)
use constant DLE => "\x10";  # Data Link Escape (binary mode)
use constant FS  => "\x1c";  # File/Record Separator
use constant GS  => "\x1d";  # Group/Array Separator
use constant RS  => "\x1e";  # Record/Property Separator
use constant US  => "\x1f";  # Unit/Key-Value Separator

# Extract DLE+STX...DLE+ETX binary sections
sub _extract_binary_sections {
    my ($text) = @_;
    my %binaries;
    my $result = '';
    my $placeholder_count = 0;
    my $i = 0;
    my $len = length($text);

    while ($i < $len) {
        if (substr($text, $i, 1) eq DLE && $i + 1 < $len && substr($text, $i + 1, 1) eq STX) {
            my $j = $i + 2;
            my $binary_data = '';

            while ($j < $len) {
                if (substr($text, $j, 1) eq DLE && $j + 1 < $len) {
                    if (substr($text, $j + 1, 1) eq ETX) {
                        my $placeholder = "\0BINARY${placeholder_count}\0";
                        $binary_data =~ s/\Q@{[DLE]}@{[DLE]}\E/DLE/ge;
                        $binaries{$placeholder} = $binary_data;
                        $result .= $placeholder;
                        $placeholder_count++;
                        $i = $j + 2;
                        last;
                    } elsif (substr($text, $j + 1, 1) eq DLE) {
                        $binary_data .= DLE;
                        $j += 2;
                        next;
                    }
                }
                $binary_data .= substr($text, $j, 1);
                $j++;
            }

            if ($j >= $len) {
                $result .= substr($text, $i, 1);
                $i++;
            }
        } else {
            $result .= substr($text, $i, 1);
            $i++;
        }
    }

    return ($result, \%binaries);
}

sub _restore_binaries {
    my ($value, $binaries) = @_;
    for my $placeholder (keys %$binaries) {
        my $data = $binaries->{$placeholder};
        $value =~ s/\Q$placeholder\E/$data/g;
    }
    return $value;
}

sub _split_respecting_nesting {
    my ($text, $sep) = @_;
    my @parts;
    my $current = '';
    my $depth = 0;

    for my $c (split //, $text) {
        if ($c eq SO) {
            $depth++;
            $current .= $c;
        } elsif ($c eq SI) {
            $depth--;
            $current .= $c;
        } elsif ($c eq $sep && $depth == 0) {
            push @parts, $current;
            $current = '';
        } else {
            $current .= $c;
        }
    }

    push @parts, $current if length($current) || @parts;
    return @parts;
}

sub _parse_value {
    my ($value, $binaries) = @_;
    $value = _restore_binaries($value, $binaries);

    # Check for nested structure (SO at start, SI at end)
    if (length($value) >= 2 && substr($value, 0, 1) eq SO && substr($value, -1) eq SI) {
        my $inner = substr($value, 1, -1);
        return _parse_object($inner, $binaries);
    }

    # Check for array
    if (index($value, GS) != -1) {
        my @parts = _split_respecting_nesting($value, GS);
        return [map { _parse_value($_, $binaries) } @parts];
    }

    return $value;
}

sub _parse_object {
    my ($content, $binaries) = @_;
    my %obj;

    my @props = _split_respecting_nesting($content, RS);
    for my $prop (@props) {
        my @parts = _split_respecting_nesting($prop, US);
        if (@parts >= 2) {
            my $k = shift @parts;
            my $v = join(US, @parts);
            $obj{$k} = _parse_value($v, $binaries);
        }
    }

    return \%obj;
}

sub parse {
    my ($text) = @_;

    my ($processed_text, $binaries) = _extract_binary_sections($text);

    my %doc = (
        header  => undef,
        records => []
    );

    my $i = 0;
    my $len = length($processed_text);

    while ($i < $len) {
        my $c = substr($processed_text, $i, 1);

        if ($c eq SOH) {
            my $stx_pos = index($processed_text, STX, $i + 1);
            if ($stx_pos == -1) {
                $i++;
                next;
            }

            my $header_content = substr($processed_text, $i + 1, $stx_pos - $i - 1);
            $doc{header} = _parse_object($header_content, $binaries);

            my $etx_pos = index($processed_text, ETX, $stx_pos + 1);
            if ($etx_pos == -1) {
                $i = $stx_pos + 1;
                next;
            }

            my $data_content = substr($processed_text, $stx_pos + 1, $etx_pos - $stx_pos - 1);
            for my $record (_split_respecting_nesting($data_content, FS)) {
                my $obj = _parse_object($record, $binaries);
                push @{$doc{records}}, $obj if %$obj;
            }

            $i = $etx_pos + 1;
        } elsif ($c eq STX) {
            my $etx_pos = index($processed_text, ETX, $i + 1);
            if ($etx_pos == -1) {
                $i++;
                next;
            }

            my $data_content = substr($processed_text, $i + 1, $etx_pos - $i - 1);
            for my $record (_split_respecting_nesting($data_content, FS)) {
                my $obj = _parse_object($record, $binaries);
                push @{$doc{records}}, $obj if %$obj;
            }

            $i = $etx_pos + 1;
        } else {
            $i++;
        }
    }

    return \%doc;
}

1;

__END__

=head1 NAME

HSV - Hierarchical Separated Values parser

=head1 SYNOPSIS

    use HSV qw(parse);

    my $doc = parse("\x02name\x1fAlice\x1eage\x1f30\x03");
    print $doc->{records}[0]{name};  # Alice

=head1 DESCRIPTION

HSV is a text-based file format and streaming protocol using ASCII control
characters. Unlimited nesting (like JSON). No escaping required.

=head1 LICENSE

CC0 1.0 - Public Domain

=cut
